# -*- coding: utf-8 -*-
"""
Created on Mon Mar 23 16:55:28 2020

@author: Administrator
"""

import time
while True:
    time_now = time.strftime("%H", time.localtime())  # 刷新
    print("开始检查现在的时间,时间为")
    print(time_now)
    if time_now == "10": #此处设置每天定时的时间

        # 此处3行替换为需要执行的动作
        print('开始采集')

        print('开始采集')
        import urllib
        import urllib.request
        from bs4 import BeautifulSoup
        import re
        import random
        import time
        import datetime
        #ip=["122.72.56.112:8080","118.144.67.50:8118"]
        #proxy_support = urllib.request.ProxyHandler({"http":random.choice(ip)})
        now_time = datetime.datetime.now().strftime('%Y-%m-%d')
        list1=[]
        for j in range(136):
            # 设置随机暂停时间
            stop = random.uniform(1, 3)
            if j == 0:
                url = "https://www.zyctd.com/jiage/1-0-0.html"
            else:
                url = "https://www.zyctd.com/jiage/1-0-0-"+str(j+1)+".html"
            #proxy_support = urllib.request.ProxyHandler({"http":random.choice(ip)})
            #req = urllib.request.build_opener(proxy_support)
            #req.addheaders=[("User-Agent",'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36')]
            #urllib.request.install_opener(req)
            #response=req.open(url)
            #url = response.read().decode("utf-8")
            req = urllib.request.Request(url)
            req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36")
            html = urllib.request.urlopen(req).read()
            soup = BeautifulSoup(html)
            soup2 = soup.findAll("ul",class_= "priceTableRows")
            soup3 = str(soup2).split('<span class="w1">') 
            
            print('已经爬取天地药材网页数',j+1)
            for i in range(1,26):
                list2 = []
                soup4 = re.search(r'<a href="(.*?)" target="_blank" title="(.*?)">(.*?)</a></span>',str(soup3[i]))
                soup5 = re.search(r'<span class="w2"><a href="(.*?)" target="_blank" title="(.*?)">(.*?)</a></span>',str(soup3[i]))
                soup6 = re.search(r'<span class="w9" title="(.*?)">(.*?)</span>',str(soup3[i]))
                soup7 = re.search(r'<span class="w3">(.*?)</span>',str(soup3[i]))
                soup8 = re.search(r'<span class="w4">(.*?)</span>',str(soup3[i]))
                soup9 = re.search(r'<span class="w5"><em class="(.*?)">(.*?)</em></span>',str(soup3[i]))
                soup10 = re.search(r'<span class="w6"><em class="(.*?)">(.*?)</em></span>',str(soup3[i]))
                soup11 = re.search(r'<span class="w7"><em class="(.*?)">(.*?)</em></span>',str(soup3[i]))
                list2.append(soup4.group(2))
                list2.append(soup5.group(2))
                list2.append(soup6.group(2))
                list2.append(soup7.group(1))
                list2.append(soup8.group(1))
                list2.append(soup9.group(2))
                list2.append(soup10.group(2))
                list2.append(soup11.group(2))
                list2.append(now_time)
                list2.append('天地药材网')
                list1.append(list2)
        
            
        import openpyxl
        data = openpyxl.load_workbook('E:/数据/药材网/日/天药合并表.xlsx')
        print(data.get_named_ranges()) # 输出工作页索引范围
        print(data.get_sheet_names()) # 输出所有工作页的名称
        # 取第一张表
        sheetnames = data.get_sheet_names()
        table = data.get_sheet_by_name(sheetnames[0])
        table = data.active
        print(table.title) # 输出表名
        nrows = table.max_row # 获得行数
        ncolumns = table.max_column # 获得行数
        values = list1
        for i in range(len(values)):
            for j in range(len(values[i])):
                table.cell(nrows+i+1,j+1).value = values[i][j]
        data.save('E:/数据/药材网/日/天药合并表.xlsx')
        print("今天完成一次数据采集!")
        subject = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + " 采集完成"
        print(subject)
        
        time.sleep(60)
        
        print('开始采集')
        import urllib
        import urllib.request
        from bs4 import BeautifulSoup
        import re
        import random
        import time
        import datetime
        
        now_time = datetime.datetime.now().strftime('%Y-%m-%d')
        
        list1 = []
        for j in range(245):
            # 设置随机暂停时间
            stop = random.uniform(1, 3)
            
            random1 = random.randint(1234567891234567,9876543219876543)
            url = "https://www.yt1998.com/price/nowDayPriceQ!getPriceList.do?random=0."+ str(random1) + "&ycnam=&market=&leibie=&istoday=&spices=&paramName=&paramValue=&pageIndex="+str(j)+"&pageSize=20"
            req = urllib.request.Request(url)
            req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36")
            html = urllib.request.urlopen(req).read()
            soup = BeautifulSoup(html)
            soupsplit = str(soup).split('},{')
            for i in range(len(soupsplit)):
                list2 = []
                user = re.search(r'"yueduibi":"<span.*?>(.*?)","igid":"(.*?)","yesterday":"<span.*?>(.*?)","chandi":"(.*?)","shichang":"(.*?)","pri":"(.*?)","dtm":"(.*?)","market":"(.*?)","zhouduibi":"<span.*?>(.*?)","jiduibi":"<span.*?>(.*?)","nianduibi":"<span.*?>(.*?)","zoushi":"(.*?)","ycnam":"(.*?)","guige":"(.*?)"',soupsplit[i])  
                list2.append(user.group(13))
                list2.append(user.group(14))
                list2.append(user.group(5))
                list2.append(user.group(6))
                list2.append(user.group(12))
                list2.append(user.group(9))
                list2.append(user.group(1))
                list2.append(user.group(11))
                list2.append(now_time)
                list2.append('药通网')
                list1.append(list2)
            print("已爬取药通网页数：",j+1)
            
        import openpyxl
        data = openpyxl.load_workbook('E:/数据/药材网/日/天药合并表.xlsx')
        print(data.get_named_ranges()) # 输出工作页索引范围
        print(data.get_sheet_names()) # 输出所有工作页的名称
        # 取第一张表
        sheetnames = data.get_sheet_names()
        table = data.get_sheet_by_name(sheetnames[0])
        table = data.active
        print(table.title) # 输出表名
        nrows = table.max_row # 获得行数
        ncolumns = table.max_column # 获得行数
        values = list1
        for i in range(len(values)):
            for j in range(len(values[i])):
                table.cell(nrows+i+1,j+1).value = values[i][j]
        data.save('E:/数据/药材网/日/天药合并表.xlsx')
        print("今天完成一次数据采集!")
        subject = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + " 采集完成"
        print(subject)
        
        

        print("一小时后再次检查")
        time.sleep(3600) # 一个小时等待
    else:
        print("未到收集时间，一小时后再次检查")
        time.sleep(3600)
        